import re
from kmp import *

data_file = "data/data.txt"

def search_idioms_in_text(text):
    # 读取数据集文件
    with open(data_file, 'r', encoding='utf-8') as f:
        data = f.readlines()

    idioms = []
    for line in data:
        line = line.strip()
        matches = re.findall(r'(\d+),"([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)"', line)
        if matches:
            index, idiom, pinyin, translation, source, abbreviation = matches[0]
            
            # 使用KMP算法查找成语
            matches = kmp_search(text, idiom)
            if len(matches) > 0:
                idiom_info = {
                    "idiom": idiom,
                    "pinyin": pinyin,
                    "translation": translation,
                    "source": source
                }
                idioms.append(idiom_info)

    return idioms
